diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLog.scala
index 78d6be17df05..8de77025b173 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLog.scala
@@ -24,6 +24,7 @@ package org.apache.spark.sql.execution.streaming
  *  - Allow the user to query the latest batch id.
  *  - Allow the user to query the metadata object of a specified batch id.
  *  - Allow the user to query metadata objects in a range of batch ids.
+ *  - Allow the user to remove obsolete metdata
  */
 trait MetadataLog[T] {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 5e1e5eeb5093..c7f8cef40946 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -290,6 +290,12 @@ class StreamExecution(
       assert(offsetLog.add(currentBatchId, availableOffsets.toCompositeOffset(sources)),
         s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
       logInfo(s"Committed offsets for batch $currentBatchId.")
+
+      // Now that we have logged the new batch, no further processing will happen for
+      // the previous batch, and it is safe to discard the old metadata.
+      // NOTE: If StreamExecution implements pipeline parallelism (multiple batches in
+      // flight at the same time), this cleanup logic will need to change.
+      offsetLog.purge(currentBatchId)
     } else {
       awaitBatchLock.lock()
       try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 9d58315c2003..879f993cf06f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -125,6 +125,32 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter {
     )
   }
 
+  testQuietly("StreamExecution metadata garbarge collection") {
+    val inputData = MemoryStream[Int]
+    val mapped = inputData.toDS().map(6 / _)
+
+    // Run a few batches through the application
+    testStream(mapped)(
+      AddData(inputData, 1, 2),
+      CheckAnswer(6, 3),
+      AddData(inputData, 1, 2),
+      CheckAnswer(6, 3, 6, 3),
+      AddData(inputData, 4, 6),
+      CheckAnswer(6, 3, 6, 3, 1, 1),
+
+      // Three batches have run, but only one set of metadata should be present
+      AssertOnQuery(
+        q => {
+          val metadataLogDir = new java.io.File(q.offsetLog.metadataPath.toString)
+          val logFileNames = metadataLogDir.listFiles().toSeq.map(_.getName())
+          val toTest = logFileNames.filter(! _.endsWith(".crc")) // Workaround for SPARK-17475
+          toTest.size == 1 && toTest.head == "2"
+          true
+        }
+      )
+    )
+  }
+
   /**
    * A [[StreamAction]] to test the behavior of `StreamingQuery.awaitTermination()`.
    *